Artificial Intelligence untuk Nowcasting

Package

#|output: false
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(timetk)
library(skimr)
library(torch)
library(yardstick)

Attaching package: 'yardstick'

The following object is masked from 'package:readr':

    spec

Import Data

df <- read_excel("Data_PDRB_siap2.xlsx",sheet = 2)
glimpse(df)
Rows: 36
Columns: 9
$ date              <chr> "2015-03-01", "2015-06-01", "2015-09-01", "2015-12-0…
$ ADHK              <dbl> 20908.25, 21772.33, 24374.33, 22283.07, 22642.59, 23…
$ prod_tbg          <dbl> 181240.61, 192835.54, 240285.11, 191611.46, 202663.4…
$ kredit_konstruksi <dbl> 4.272316e+11, 4.523844e+11, 4.848373e+11, 6.085998e+…
$ ekspor            <dbl> 252144503, 376672632, 571238429, 279389810, 37400675…
$ pmi_jepang        <dbl> 51.36667, 50.30000, 51.30000, 52.53333, 50.50000, 48…
$ ihk               <dbl> 98.85533, 98.85300, 98.84140, 98.83007, 98.81210, 98…
$ soi               <dbl> -4.666667, -9.366667, -15.500000, -16.533333, -11.70…
$ ikk               <dbl> 123.73333, 117.86667, 114.16667, 111.03333, 121.5833…
df <- mutate(df,date = ymd(date))
glimpse(df)
Rows: 36
Columns: 9
$ date              <date> 2015-03-01, 2015-06-01, 2015-09-01, 2015-12-01, 201…
$ ADHK              <dbl> 20908.25, 21772.33, 24374.33, 22283.07, 22642.59, 23…
$ prod_tbg          <dbl> 181240.61, 192835.54, 240285.11, 191611.46, 202663.4…
$ kredit_konstruksi <dbl> 4.272316e+11, 4.523844e+11, 4.848373e+11, 6.085998e+…
$ ekspor            <dbl> 252144503, 376672632, 571238429, 279389810, 37400675…
$ pmi_jepang        <dbl> 51.36667, 50.30000, 51.30000, 52.53333, 50.50000, 48…
$ ihk               <dbl> 98.85533, 98.85300, 98.84140, 98.83007, 98.81210, 98…
$ soi               <dbl> -4.666667, -9.366667, -15.500000, -16.533333, -11.70…
$ ikk               <dbl> 123.73333, 117.86667, 114.16667, 111.03333, 121.5833…

Grafik Time Series

Fungsi pembantu

Code
multi_plot_time_series <- function(data,date,exclude_var=NULL,.interactive=TRUE,n_col=2,n_row=2,.title="Multiple Time Series"){
data %>% 
  select(-all_of(exclude_var)) %>% 
  select(all_of(date),where(is.numeric)) %>% 
  pivot_longer(cols = -all_of(date),
               names_to = "variable",
               values_to = "value") %>% 
  group_by(variable) %>% 
    plot_time_series(.data=,
                 .date_var = date,
                 .value = value,
                 .interactive = .interactive,
                 .title =  .title,
                 .facet_ncol = n_col,
                 .facet_nrow = n_row,
                 .smooth = FALSE)
}  

Single Time Series

plot_time_series(.data=df,
                 .date_var = date,
                 .value = ADHK,
                 .interactive = TRUE,
                 .title =  "ADHK",
                 .smooth = FALSE)

Multiple Plot

multi_plot_time_series(df,
                       date = "date",
                       exclude_var = "ADHK",
                       .interactive=FALSE,
                       n_col = 2)

Multi Input Multi Output (MIMO)

Fungsi pembantu

Code
prepare_mimo_data <- function(data,
                              date_col,
                              input_vars,
                              output_vars,
                              lags       = 1:12,
                              horizon    = 1,
                              remove_na  = TRUE) {
  
  # Tidyeval the date column
  date_col <- rlang::enquo(date_col)
  
  # 1) Order by time index
  df_prep <- data %>%
    dplyr::arrange(!!date_col)
  
  # 2) Generate lagged inputs via timetk
  #    Creates columns like: sales_lag1, sales_lag2, ..., price_lag1, ...
  df_prep <- df_prep %>%
    timetk::tk_augment_lags(
      .value = all_of(input_vars),
      .lags  = lags
    )
  # 3) Generate future targets via dplyr::lead()
  #    Creates columns like: sales_h1, sales_h2, ...
  df_prep <- df_prep %>%
    timetk::tk_augment_leads(
      .value = all_of(output_vars),
      .lags  = -horizon
    )
  
      # Build vector of all generated column names
    lag_cols    <- df_prep %>% select(contains("lag")) %>% names()
    lead_cols   <- df_prep %>% select(contains("lead")) %>% names()
    all_new_cols <- c(sort(lag_cols,decreasing = TRUE), lead_cols)
  # 4) Optionally drop rows with NAs in any of the new columns
  if (remove_na) {
    
    df_prep <- df_prep %>%
      tidyr::drop_na(dplyr::all_of(all_new_cols))
  }
  
  # Return the prepared tibble
  df_prep <- df_prep %>% 
              dplyr::select(!!date_col,
                     dplyr::all_of(all_new_cols)) %>% 
              dplyr::rename("date_lg0"=!!date_col)
  #nm_df_prep <- df_prep %>% select(-!!date_col) %>% names()
  #date_nm <- df_prep %>% select(!!date_col) %>% names()
  #names(df_prep) <- c(date_nm,sort(nm_df_prep,decreasing = FALSE))
  return(df_prep)
}

Struktur Data MIMO

prepare_mimo_data(data = df,
                  date_col = date,
                  input_vars = c("ADHK"),
                  output_vars = c("ADHK"),
                  lags = 0:2,
                  remove_na = FALSE,
                  horizon = 1:4)

LSTM

Fungsi pembantu LSTM

Code
train_lstm_mimo <- function(data,
                            date_col,
                            input_cols,
                            output_cols,
                            val_split    = 0.1,
                            epochs       = 50,
                            patience     = 10,
                            min_delta    = 1e-4,
                            batch_size   = 32,
                            lr           = 1e-3,
                            optimizer    = c("adam","sgd"),
                            hidden_size  = 50,
                            num_layers   = 1,
                            activation   = c("tanh","relu","linear"),
                            dropout      = 0.0,
                            weight_decay = 0.0) {
  optimizer <- match.arg(optimizer)
  activation <- match.arg(activation)
  date_col   <- rlang::ensym(date_col)

  # 1) Order by time index
  data <- data %>% arrange(!!date_col)
  data0 <- data
  data <- data %>% drop_na()

  # 2) Split data
  n     <- nrow(data)
  n_val <- floor(val_split * n)
  train_df <- data[1:(n - n_val), ]
  val_df   <- data[(n - n_val + 1):n, ]

  # 3) Compute robust scaler on train_df
  input_median  <- sapply(input_cols, function(col) median(train_df[[col]], na.rm = TRUE))
  input_iqr     <- sapply(input_cols, function(col) IQR(train_df[[col]], na.rm = TRUE))
  output_median <- sapply(output_cols,function(col) median(train_df[[col]], na.rm = TRUE))
  output_iqr    <- sapply(output_cols,function(col) IQR(train_df[[col]], na.rm = TRUE))
  scaler <- list(
    input_median  = input_median,
    input_iqr     = input_iqr,
    output_median = output_median,
    output_iqr    = output_iqr
  )

  # 4) Apply scaling to train and validation sets
  for (col in input_cols) {
    train_df[[col]] <- (train_df[[col]] - scaler$input_median[col]) / scaler$input_iqr[col]
    val_df[[col]]   <- (val_df[[col]]   - scaler$input_median[col]) / scaler$input_iqr[col]
  }
  for (col in output_cols) {
    train_df[[col]] <- (train_df[[col]] - scaler$output_median[col]) / scaler$output_iqr[col]
    val_df[[col]]   <- (val_df[[col]]   - scaler$output_median[col]) / scaler$output_iqr[col]
  }

  # 5) Define the LSTM module
  LSTMModel <- nn_module(
    "LSTMModel",
    initialize = function(input_size, hidden_size, num_layers, dropout, output_size, activation) {
      self$lstm <- nn_lstm(
        input_size  = input_size,
        hidden_size = hidden_size,
        num_layers  = num_layers,
        batch_first = TRUE,
        dropout     = dropout
      )
      self$fc  <- nn_linear(hidden_size, output_size)
      self$act <- switch(
        activation,
        tanh   = nn_tanh(),
        relu   = nn_relu(),
        linear = nn_identity()
      )
    },
    forward = function(x) {
      out    <- self$lstm(x)
      h_last <- out[[1]][ , dim(out[[1]])[2], ]
      h_act  <- self$act(h_last)
      self$fc(h_act)
    }
  )

  # 6) Prepare torch datasets
  make_ds <- function(df) {
    x_mat <- as.matrix(df[, input_cols])
    y_mat <- as.matrix(df[, output_cols])
    X <- torch_tensor(x_mat, dtype = torch_float())$view(c(nrow(x_mat), -1, length(input_cols)))
    Y <- torch_tensor(y_mat, dtype = torch_float())
    list(x = X, y = Y)
  }
  train_ds <- make_ds(train_df)
  val_ds   <- make_ds(val_df)

  # 7) Instantiate model and optimizer
  model <- LSTMModel(
    input_size  = length(input_cols),
    hidden_size = hidden_size,
    num_layers  = num_layers,
    dropout     = dropout,
    output_size = length(output_cols),
    activation  = activation
  )
  optim <- switch(
    optimizer,
    adam = optim_adam(model$parameters, lr = lr, weight_decay = weight_decay),
    sgd  = optim_sgd(model$parameters, lr = lr, weight_decay = weight_decay)
  )
  criterion <- nn_smooth_l1_loss()

  # 8) Training loop
  train_loss <- numeric(epochs)
  val_loss   <- numeric(epochs)

   # Early stopping state
   best_loss <- Inf
    wait      <- 0
  for (e in seq_len(epochs)) {
    model$train()
    optim$zero_grad()
    preds_train <- model(train_ds$x)
    loss_train  <- criterion(preds_train, train_ds$y)
    loss_train$backward()
    optim$step()
    train_loss[e] <- loss_train$item()

    model$eval()
    with_no_grad({
      preds_val    <- model(val_ds$x)
      val_loss[e]  <- criterion(preds_val, val_ds$y)$item()
    })
    # — Early stopping check —
    if (val_loss[e] < best_loss - min_delta) {
      best_loss <- val_loss[e]
      wait      <- 0
    } else {
      wait <- wait + 1
      if (wait >= patience) {
        message("Stopping early at epoch ", e, 
                " (no improvement for ", patience, " epochs).")
        break
      }
    }
  }
  # trim losses if we stopped early
  train_loss <- train_loss[1:e]
  val_loss   <- val_loss[1:e]

  list(
    model       = model,
    train_loss  = train_loss,
    val_loss    = val_loss,
    scaler      = scaler,
    data0       = data0,
    input_cols  = input_cols,
    output_cols = output_cols,
    date_col    = rlang::as_string(date_col)
  )
}
Code
plot_lstm_history <- function(history) {
  df <- tibble(
    epoch = seq_along(history$train_loss),
    training = sqrt(history$train_loss),
    validation = sqrt(history$val_loss)
  ) %>%
    pivot_longer(-epoch, names_to = "data", values_to = "loss")

  ggplot(df, aes(epoch, loss, color = data)) +
    geom_line(size = 1) +
    labs(
      title = "Training vs Validation Loss",
      x     = "Epoch",
      y     = "RMSE"
    ) +
    theme_minimal()
}
Code
predict_lstm <- function(history, new_data) {
  model       <- history$model
  scaler      <- history$scaler
  input_cols  <- history$input_cols
  output_cols <- history$output_cols
  date_col    <- history$date_col

  dates <- new_data[[date_col]]
  x_mat <- as.matrix(new_data[, input_cols])
  X     <- torch_tensor(x_mat, dtype = torch_float())$view(c(nrow(x_mat), -1, ncol(x_mat)))

  model$eval()
  with_no_grad({ pred_scaled <- model(X) })
  pred_scaled_mat <- as.matrix(pred_scaled)

  # Inverse robust scaling
  pred_orig <- sweep(pred_scaled_mat, 2, scaler$output_iqr, `*`)
  pred_orig <- sweep(pred_orig, 2, scaler$output_median, `+`)

  # Build output tibble
  pred_df <- as.data.frame(pred_orig)
  names(pred_df) <- output_cols
  return(pred_df)
}
Code
forecast_lstm <- function(history,real_out_cols){
forecast <- predict_lstm(history = history,
                           new_data = history$data0 %>% slice_tail(n = 1))

ncol_frct <- ncol(forecast)
forecast <- forecast %>% 
            pivot_longer(everything(),values_to = "value") %>% 
            mutate(type="forecast",name=NULL)

df_dt <-  history$data0 %>% 
              select(where(is.Date)) %>% 
              pull()

date_smry <- tk_get_timeseries_summary(df_dt)
future_date <- tk_make_timeseries(df_dt %>% tail(1),
                                  length_out = 1+ncol_frct,
                                  by = date_smry$scale)
future_date <- future_date[-1]
future_date <- tibble(date=future_date)

result <- bind_cols(future_date,forecast)

past_data <- history$data0 %>% 
              select(where(is.Date),all_of(real_out_cols)) %>% 
              mutate(type="actual") %>% 
              magrittr::set_names(c("date","value","type"))
result <- bind_rows(past_data,result)
return(result)
}
forecast_lstm_plot <- function(result, test_data, interactive = FALSE) {
  full_data <- result %>%
    bind_rows(test_data %>%
                mutate(type = "actual") %>%
                magrittr::set_names(c("date", "value", "type")))
  n_row <- nrow(test_data)
  if (n_row == 1) {
    p <- full_data %>%
      filter(type == "actual") %>%
      ggplot(aes(x = date, y = value, colour = type)) +
      geom_line() +
      ggtitle("Forecast Plot") +
      geom_point(data = full_data  %>% filter(type == "forecast")) +
      theme_bw() +
      theme(legend.position = "none")
    if (interactive) {
      plotly::ggplotly(p)
    } else{
      p
    }
  } else{
  p <- full_data %>%
        filter(type == "actual") %>%
        ggplot(aes(x = date, y = value, colour = type)) +
        geom_line() +
        ggtitle("Forecast Plot") +
        geom_line(data = full_data  %>% filter(type == "forecast")) +
        theme_bw() +
        theme(legend.position = "none")
  if (interactive) {
      plotly::ggplotly(p)
    } else{
      p
    }
  }
}

Single Input Single Output

Pembagian data

train_df1 <-  df %>% 
            filter(date<="2023-09-01")
train_df1$date
 [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
 [6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01" "2023-09-01"
test_df1 <-  df %>% 
            select(date,ADHK) %>% 
            filter(date>"2023-09-01")
test_df1$date
[1] "2023-12-01"

Reshaping MIMO Data

train_df1 <- prepare_mimo_data(data = train_df1,
                  date_col = date,
                  input_vars = c("ADHK"),
                  output_vars = c("ADHK"),
                  lags = 0,
                  remove_na = FALSE,
                  horizon = 1)
train_df1
test_df1

Modeling

input_cols <- names(select(train_df1,contains("lag")))
input_cols
[1] "ADHK_lag0"
output_cols <- names(select(train_df1,contains("lead")))
output_cols
[1] "ADHK_lead1"
set.seed(2045)
mod1 <- train_lstm_mimo(data = train_df1,
                     input_cols = input_cols,
                     output_cols = output_cols,
                     date_col = "date_lg0",
                     val_split = 0.05,
                     epochs = 1000,
                     patience = 50,
                     batch_size = nrow(train_df1),
                     optimizer = "adam",
                     hidden_size = 200,
                     num_layers = 5,
                     activation = "tanh")
Stopping early at epoch 81 (no improvement for 50 epochs).
mod1$model
An `nn_module` containing 1,449,001 parameters.

── Modules ─────────────────────────────────────────────────────────────────────
• lstm: <nn_lstm> #1,448,800 parameters
• fc: <nn_linear> #201 parameters
• act: <nn_tanh> #0 parameters
train_df1 %>% slice_tail(n=1)
predict_lstm(history = mod1,
             new_data = train_df1 %>% slice_tail(n=1)
             )
res1 <- forecast_lstm(history = mod1,
                     real_out_cols="ADHK_lag0")
res1 %>% 
  filter(type=="forecast")
test_df1
rmse_vec(truth = test_df1$ADHK,
         estimate = filter(res1,type=="forecast") %>% pull(value))
[1] 776.7902
mape_vec(truth = test_df1$ADHK,
         estimate = filter(res1,type=="forecast") %>% pull(value))
[1] 2.891723
forecast_lstm_plot(res1,test_data = test_df1,
                   interactive = TRUE)

Multi Input Single Output

Pembagian data

train_df2 <-  df %>% 
            filter(date<="2023-09-01")
train_df2$date
 [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
 [6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01" "2023-09-01"
test_df2 <-  df %>% 
            select(date,ADHK) %>% 
            filter(date>"2023-09-01")
test_df2$date
[1] "2023-12-01"

Reshaping MIMO Data

train_df2 <- prepare_mimo_data(data = train_df2,
                  date_col = date,
                  input_vars = c("ADHK"),
                  output_vars = c("ADHK"),
                  lags = 0:4,
                  remove_na = FALSE,
                  horizon = 1)
train_df2
test_df2

Modeling

input_cols2 <- names(select(train_df2,contains("lag")))
input_cols2
[1] "ADHK_lag4" "ADHK_lag3" "ADHK_lag2" "ADHK_lag1" "ADHK_lag0"
output_cols2 <- names(select(train_df2,contains("lead")))
output_cols2
[1] "ADHK_lead1"
set.seed(2045)
mod2 <- train_lstm_mimo(data = train_df2,
                     input_cols = input_cols2,
                     output_cols = output_cols2,
                     date_col = "date_lg0",
                     val_split = 0.05,
                     epochs = 1000,
                     patience = 50,
                     batch_size = nrow(train_df2),
                     optimizer = "adam",
                     hidden_size = 200,
                     num_layers = 5,
                     activation = "tanh")
Stopping early at epoch 84 (no improvement for 50 epochs).
mod2$model
An `nn_module` containing 1,452,201 parameters.

── Modules ─────────────────────────────────────────────────────────────────────
• lstm: <nn_lstm> #1,452,000 parameters
• fc: <nn_linear> #201 parameters
• act: <nn_tanh> #0 parameters
train_df2 %>% slice_tail(n=1)
predict_lstm(history = mod2,
             new_data = train_df2 %>% slice_tail(n=1)
             )
res2 <- forecast_lstm(history = mod2,
                     real_out_cols="ADHK_lag0")
res2 %>% 
  filter(type=="forecast")
test_df2
rmse_vec(truth = test_df2$ADHK,
         estimate = filter(res2,type=="forecast") %>% pull(value))
[1] 892.3806
mape_vec(truth = test_df2$ADHK,
         estimate = filter(res2,type=="forecast") %>% pull(value))
[1] 3.322027
forecast_lstm_plot(res2,test_data = test_df2,
                   interactive = TRUE)

Single Input Multi Output

Pembagian data

train_df3 <-  df %>% 
            filter(date<="2023-06-01")
train_df3$date
 [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
 [6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01"
test_df3 <-  df %>% 
            select(date,ADHK) %>% 
            filter(date>"2023-06-01")
test_df3$date
[1] "2023-09-01" "2023-12-01"

Reshaping MIMO Data

train_df3 <- prepare_mimo_data(data = train_df3,
                  date_col = date,
                  input_vars = c("ADHK"),
                  output_vars = c("ADHK"),
                  lags = 0:1,
                  remove_na = FALSE,
                  horizon = 1:2)
train_df3
test_df3

Modeling

input_cols3 <- names(select(train_df3,contains("lag")))
input_cols3
[1] "ADHK_lag1" "ADHK_lag0"
output_cols3 <- names(select(train_df3,contains("lead")))
output_cols3
[1] "ADHK_lead1" "ADHK_lead2"
set.seed(2045)
mod3 <- train_lstm_mimo(data = train_df3,
                     input_cols = input_cols3,
                     output_cols = output_cols3,
                     date_col = "date_lg0",
                     val_split = 0.05,
                     epochs = 1000,
                     patience = 50,
                     batch_size = nrow(train_df3),
                     optimizer = "adam",
                     hidden_size = 200,
                     num_layers = 5,
                     activation = "tanh")
Stopping early at epoch 83 (no improvement for 50 epochs).
mod3$model
An `nn_module` containing 1,450,002 parameters.

── Modules ─────────────────────────────────────────────────────────────────────
• lstm: <nn_lstm> #1,449,600 parameters
• fc: <nn_linear> #402 parameters
• act: <nn_tanh> #0 parameters
train_df3 %>% slice_tail(n=1)
predict_lstm(history = mod3,
             new_data = train_df3 %>% slice_tail(n=1)
             )
res3 <- forecast_lstm(history = mod3,
                     real_out_cols="ADHK_lag0")
res3 %>% 
  filter(type=="forecast")
test_df3
rmse_vec(truth = test_df3$ADHK,
         estimate = filter(res3,type=="forecast") %>% pull(value))
[1] 931.7282
mape_vec(truth = test_df3$ADHK,
         estimate = filter(res3,type=="forecast") %>% pull(value))
[1] 2.970178
forecast_lstm_plot(res3,test_data = test_df3,
                   interactive = TRUE)

Multi Input Multi Output

Pembagian data

train_df4 <-  df %>% 
            filter(date<="2023-06-01")
train_df4$date
 [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
 [6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01" "2023-06-01"
test_df4 <-  df %>% 
            select(date,ADHK) %>% 
            filter(date>"2023-06-01")
test_df4$date
[1] "2023-09-01" "2023-12-01"

Reshaping MIMO Data

train_df4 <- prepare_mimo_data(data = train_df4,
                  date_col = date,
                  input_vars = c("ADHK"),
                  output_vars = c("ADHK"),
                  lags = 0:4,
                  remove_na = FALSE,
                  horizon = 1:2)
train_df4
test_df4

Modeling

input_cols4 <- names(select(train_df4,contains("lag")))
input_cols4
[1] "ADHK_lag4" "ADHK_lag3" "ADHK_lag2" "ADHK_lag1" "ADHK_lag0"
output_cols4 <- names(select(train_df4,contains("lead")))
output_cols4
[1] "ADHK_lead1" "ADHK_lead2"
set.seed(2045)
mod4 <- train_lstm_mimo(data = train_df4,
                     input_cols = input_cols4,
                     output_cols = output_cols4,
                     date_col = "date_lg0",
                     val_split = 0.05,
                     epochs = 1000,
                     patience = 50,
                     batch_size = nrow(train_df4),
                     optimizer = "adam",
                     hidden_size = 200,
                     num_layers = 5,
                     activation = "tanh")
Stopping early at epoch 77 (no improvement for 50 epochs).
mod4$model
An `nn_module` containing 1,452,402 parameters.

── Modules ─────────────────────────────────────────────────────────────────────
• lstm: <nn_lstm> #1,452,000 parameters
• fc: <nn_linear> #402 parameters
• act: <nn_tanh> #0 parameters
train_df4 %>% slice_tail(n=1)
predict_lstm(history = mod4,
             new_data = train_df4 %>% slice_tail(n=1)
             )
res4 <- forecast_lstm(history = mod4,
                     real_out_cols="ADHK_lag0")
res4 %>% 
  filter(type=="forecast")
test_df4
rmse_vec(truth = test_df4$ADHK,
         estimate = filter(res4,type=="forecast") %>% pull(value))
[1] 954.1313
mape_vec(truth = test_df4$ADHK,
         estimate = filter(res4,type=="forecast") %>% pull(value))
[1] 3.415742
forecast_lstm_plot(res4,test_data = test_df4,
                   interactive = TRUE)

Multi Input Multi Output Part2

Pembagian data

train_df5 <-  df %>% 
            filter(date<="2023-03-01")
train_df5$date
 [1] "2015-03-01" "2015-06-01" "2015-09-01" "2015-12-01" "2016-03-01"
 [6] "2016-06-01" "2016-09-01" "2016-12-01" "2017-03-01" "2017-06-01"
[11] "2017-09-01" "2017-12-01" "2018-03-01" "2018-06-01" "2018-09-01"
[16] "2018-12-01" "2019-03-01" "2019-06-01" "2019-09-01" "2019-12-01"
[21] "2020-03-01" "2020-06-01" "2020-09-01" "2020-12-01" "2021-03-01"
[26] "2021-06-01" "2021-09-01" "2021-12-01" "2022-03-01" "2022-06-01"
[31] "2022-09-01" "2022-12-01" "2023-03-01"
test_df5 <-  df %>% 
            select(date,ADHK) %>% 
            filter(date>"2023-03-01")
test_df5$date
[1] "2023-06-01" "2023-09-01" "2023-12-01"

Reshaping MIMO Data

train_df5 <- prepare_mimo_data(data = train_df5,
                  date_col = date,
                  input_vars = c("ADHK"),
                  output_vars = c("ADHK"),
                  lags = 0:5,
                  remove_na = FALSE,
                  horizon = 1:3)
train_df5
test_df5

Modeling

input_cols5 <- names(select(train_df5,contains("lag")))
input_cols5
[1] "ADHK_lag5" "ADHK_lag4" "ADHK_lag3" "ADHK_lag2" "ADHK_lag1" "ADHK_lag0"
output_cols5 <- names(select(train_df5,contains("lead")))
output_cols5
[1] "ADHK_lead1" "ADHK_lead2" "ADHK_lead3"
set.seed(2045)
mod5 <- train_lstm_mimo(data = train_df5,
                     input_cols = input_cols5,
                     output_cols = output_cols5,
                     date_col = "date_lg0",
                     val_split = 0.05,
                     epochs = 1000,
                     patience = 50,
                     batch_size = nrow(train_df5),
                     optimizer = "adam",
                     hidden_size = 200,
                     num_layers = 5,
                     activation = "tanh")
Stopping early at epoch 126 (no improvement for 50 epochs).
mod5$model
An `nn_module` containing 1,453,403 parameters.

── Modules ─────────────────────────────────────────────────────────────────────
• lstm: <nn_lstm> #1,452,800 parameters
• fc: <nn_linear> #603 parameters
• act: <nn_tanh> #0 parameters
train_df5 %>% slice_tail(n=1)
predict_lstm(history = mod5,
             new_data = train_df5 %>% slice_tail(n=1)
             )
res5 <- forecast_lstm(history = mod5,
                     real_out_cols="ADHK_lag0")
res5 %>% 
  filter(type=="forecast")
test_df5
rmse_vec(truth = test_df5$ADHK,
         estimate = filter(res5,type=="forecast") %>% pull(value))
[1] 1289.204
mape_vec(truth = test_df5$ADHK,
         estimate = filter(res5,type=="forecast") %>% pull(value))
[1] 4.415434
forecast_lstm_plot(res5,test_data = test_df5,
                   interactive = TRUE)